{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "kTHK_zzU9VDZ"
   },
   "source": [
    "# Partage de modèles pré-entraînés (TensorFlow)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "o7yACkWt9VDb"
   },
   "source": [
    "Installez la bibliothèque 🤗 Transformers pour exécuter ce *notebook*."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "jqz4b3j-9VDc"
   },
   "outputs": [],
   "source": [
    "!pip install datasets transformers[sentencepiece]\n",
    "!apt install git-lfs"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "EnJaBW3F9VDd"
   },
   "source": [
    "Vous aurez besoin de configurer git, adaptez votre email et votre nom dans la cellule suivante."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "8WVsxebR9VDe"
   },
   "outputs": [],
   "source": [
    "!git config --global user.email \"you@example.com\"\n",
    "!git config --global user.name \"Your Name\""
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {
    "id": "9BIXGjdV9VDf"
   },
   "source": [
    "Vous devrez également être connecté au Hub d'Hugging Face. Exécutez ce qui suit et entrez vos informations d'identification."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "fy3htsL-9VDg"
   },
   "outputs": [],
   "source": [
    "from huggingface_hub import notebook_login\n",
    "\n",
    "notebook_login()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "c8dfr6V89VDg"
   },
   "outputs": [],
   "source": [
    "from transformers import PushToHubCallback\n",
    "\n",
    "callback = PushToHubCallback(\n",
    "    \"camembert-base-finetuned-paws-x\", save_strategy=\"epoch\", tokenizer=tokenizer\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "I1fUmjLX9VDh"
   },
   "outputs": [],
   "source": [
    "from transformers import TFAutoModelForMaskedLM, AutoTokenizer\n",
    "\n",
    "checkpoint = \"camembert-base\"\n",
    "\n",
    "model = TFAutoModelForMaskedLM.from_pretrained(checkpoint)\n",
    "tokenizer = AutoTokenizer.from_pretrained(checkpoint)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "97pRWpyo9VDi"
   },
   "outputs": [],
   "source": [
    "model.push_to_hub(\"dummy-model\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "uoxrZuLy9VDi"
   },
   "outputs": [],
   "source": [
    "tokenizer.push_to_hub(\"dummy-model\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "ewRTPTvd9VDk"
   },
   "outputs": [],
   "source": [
    "tokenizer.push_to_hub(\"dummy-model\", organization=\"huggingface\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "YlsxkELo9VDk"
   },
   "outputs": [],
   "source": [
    "tokenizer.push_to_hub(\"dummy-model\", organization=\"huggingface\", use_auth_token=\"<TOKEN>\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "429P-xUG9VDl"
   },
   "outputs": [],
   "source": [
    "from huggingface_hub import (\n",
    "    # Gestion de l'utilisateur\n",
    "    login,\n",
    "    logout,\n",
    "    whoami,\n",
    "\n",
    "    # Création et gestion du dépôt\n",
    "    create_repo,\n",
    "    delete_repo,\n",
    "    update_repo_visibility,\n",
    "\n",
    "    # Quelques méthodes pour récupérer/changer des informations sur le contenu\n",
    "    list_models,\n",
    "    list_datasets,\n",
    "    list_metrics,\n",
    "    list_repo_files,\n",
    "    upload_file,\n",
    "    delete_file,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "KqfBiUKi9VDl"
   },
   "outputs": [],
   "source": [
    "from huggingface_hub import create_repo\n",
    "\n",
    "create_repo(\"dummy-model\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "53Ybj6N39VDm"
   },
   "outputs": [],
   "source": [
    "from huggingface_hub import create_repo\n",
    "\n",
    "create_repo(\"dummy-model\", organization=\"huggingface\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "DwcAm34i9VDm"
   },
   "outputs": [],
   "source": [
    "from huggingface_hub import upload_file\n",
    "\n",
    "upload_file(\n",
    "    \"<path_to_file>/config.json\",\n",
    "    path_in_repo=\"config.json\",\n",
    "    repo_id=\"<namespace>/dummy-model\",\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "EvqW-UYB9VDm"
   },
   "outputs": [],
   "source": [
    "from huggingface_hub import Repository\n",
    "\n",
    "repo = Repository(\"<path_to_dummy_folder>\", clone_from=\"<namespace>/dummy-model\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "vGQjKdg49VDm"
   },
   "outputs": [],
   "source": [
    "repo.git_pull()\n",
    "repo.git_add()\n",
    "repo.git_commit()\n",
    "repo.git_push()\n",
    "repo.git_tag()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "ciz8xZiw9VDn"
   },
   "outputs": [],
   "source": [
    "repo.git_pull()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "5ex39L0Y9VDn"
   },
   "outputs": [],
   "source": [
    "model.save_pretrained(\"<path_to_dummy_folder>\")\n",
    "tokenizer.save_pretrained(\"<path_to_dummy_folder>\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "HvPQc_1c9VDn"
   },
   "outputs": [],
   "source": [
    "repo.git_add()\n",
    "repo.git_commit(\"Add model and tokenizer files\")\n",
    "repo.git_push()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "LJ5Eu1Qz9VDn"
   },
   "outputs": [],
   "source": [
    "from transformers import TFAutoModelForMaskedLM, AutoTokenizer\n",
    "\n",
    "checkpoint = \"camembert-base\"\n",
    "\n",
    "model = TFAutoModelForMaskedLM.from_pretrained(checkpoint)\n",
    "tokenizer = AutoTokenizer.from_pretrained(checkpoint)\n",
    "\n",
    "# Faites ce que vous voulez avec le modèle, entraînez-le, finetunez-le...\n",
    "\n",
    "model.save_pretrained(\"<path_to_dummy_folder>\")\n",
    "tokenizer.save_pretrained(\"<path_to_dummy_folder>\")"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "collapsed_sections": [],
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
